
clear
set more off

/*********************************************************************************
Name: hw_summary_stats.do

Data In: [Data/Original/hw_surveys.dta, 
		  Data/Intermediate/hw_roster_by_hw.dta,
		  Data/Original/hw_tech_surveys.dta,
		  Data/Intermediate/hw_controls.dta,
		  Data/Original/centerwise_sheets.dta,
		  Data/Intermediate/hw_roster_by_center.dta,
		  Data/Original/observation_days_baseline.dta]

Data Out: [Data/Intermediate/hw_covariates.dta,
		   Data/Intermediate/baseline_opasha_visit.dta,
		   Data/Intermediate/balance_check_Table1_PanelC1.dta,
		   Data/Intermediate/balance_check_Table1_PanelC2.dta]

Results Out: [Results/Paper/Table1_PanelA.out,
		 	  Results/Appendix/TableA1_PanelA.out, 
			  Results/Appendix/TableA1_PanelB_1.out,
			  Results/Appendix/TableA1_PanelB_5.out,
			  Results/Paper/Table1_PanelB.out,
			  Results/Appendix/TableA1_PanelB_2.out,
			  Results/Paper/Table1_PanelC_1.out,
			  Results/Paper/Table1_PanelC_2.out,
			  Results/Appendix/TableA1_PanelB_3.out,
			  Results/Appendix/TableA1_PanelB_4.out]

Purpose of do-file: Generating descriptive statistics and conducting balance checks on health workers' characteristics

Organization: PART-1: Checking the balance in health workers' characteristics across the treatment and control groups
			  PART-2: Checking the balance in center‐wise data sheets outcomes across the treatment and control groups
			  PART-3: Checking the balance in observation days outcomes across the treatment and control groups
*********************************************************************************/

* Setting path directory
cd "${DIRECTORY}"


****************************************
*** PART-1 *** Descriptive statistics and balance checks for health workers' characteristics
****************************************

** Calling and merging datasets

use "Data/Original/hw_surveys.dta", clear

merge 1:1 Unique_ID using "Data/Intermediate/hw_roster_by_hw.dta"
count if _merge ~= 3
drop _merge

merge 1:1 Unique_ID using "Data/Original/hw_tech_surveys.dta"
count if _merge ~= 3
drop _merge

merge m:1 Unique_ID using "Data/Intermediate/hw_controls.dta"
count if _merge ~= 3
drop _merge


** Creating the variables

* Gender
ta b_sex, m
gen male = (b_sex==1) if b_sex != .
label var male "Gender (male)"

* Age
label var b_age "Age"

* Caste
ta b_caste, m
gen caste_general = (b_caste == 1) if b_caste ~= .
gen caste_obc = (b_caste == 2) if b_caste ~= .
gen caste_sc = (b_caste == 3) if b_caste ~= .
gen caste_st = (b_caste == 4) if b_caste ~= .
gen caste_minority = (b_caste == 5) if b_caste ~= .

* Religion
ta b_religion, m
gen religion_hindu = (b_religion == 1) if b_religion ~= .
gen religion_muslim = (b_religion == 2) if b_religion ~= .
gen religion_oth = (b_religion < 0 | b_religion > 2) if b_religion ~= .

* Highest education level achieved
ta b_highest_edu
gen b_highest_edu_v2 = b_highest_edu
replace b_highest_edu_v2= b_current_degree-1 if b_current_degree==2|b_current_degree==3|b_current_degree==4|b_current_degree==7|b_current_degree==8|b_current_degree==9
replace b_highest_edu_v2= b_current_degree-2 if b_current_degree==6
replace b_highest_edu_v2= 9 if b_current_degree_oth==22
replace b_highest_edu_v2= 4 if b_current_degree_oth==19
replace b_highest_edu_v2=. if b_highest_edu_v2==-555|b_highest_edu_v2==-111

tab b_highest_edu_v2, gen(edu)

gen otherdip_nonform=edu1+edu10
label var otherdip_nonform "Other diploma/non-formal"
gen twelveandbelow=edu2+edu3+edu4+edu5
label var twelveandbelow "Twelve and below"
gen tertiary=edu6+edu7+edu8+edu9
label var tertiary "Under three years of university"

drop edu1-edu10 b_highest_edu_v2

* Number of years of work experience
replace b_inc_yy = . if b_inc_yy == -888 | b_inc_yy == -999

* Previous job related to TB
ta b_tb_job, m
gen prev_job_TB = (b_tb_job == 1) 
label var prev_job_TB "Have you ever had a job related to TB before OA?"

* Social sector job
ta b_work_sector, m
gen social_sector = (b_work_sector==2) if b_work_sector ~= . & b_work_sector ~= -111 & b_work_sector ~= -999
label var social_sector "Any previous experience in the social/NGO sector"

* Other income generating activities
tab b_inc_gen_act, m
gen other_income = (b_inc_gen_act == 1) if b_inc_gen_act != -888
label var other_income "Do you have any other income generating activities in addition to OA?"

* Lives in the same neighbourhood as at least one center
gen same_nbhd = .
replace same_nbhd = 1 if (b_center1_same_nbhd==1 | b_center2_same_nbhd==1)
replace same_nbhd = 0 if (b_center1_same_nbhd == 0) & (b_center2_same_nbhd == 0 | b_center2_same_nbhd == -555)
label var same_nbhd "Health worker lives in same area as atleast 1 center"

* Household size
ta b_live_alone b_hh_size
gen live_alone = (b_live_alone) if b_live_alone >= 0
gen hhd_size = b_hh_size + 1 if b_hh_size > 0
replace hhd_size = 1 if live_alone == 1

* Electricty
gen elec=0
replace elec=1 if b_electricity==1
replace elec=. if b_electricity==-111|b_electricity==-888
label var elec "Has electricty"

* Tap water
gen tap=0
replace tap=1 if b_tapwater==1
replace tap=. if b_tapwater==-111|b_tapwater==-888
label var tap "Has tap water"

* Television
gen tv=0
replace tv=1 if b_tv==1
replace tv=. if b_tv < 0
label var tv "Has TV"

* Refrigerator
gen fridge=0
replace fridge=1 if b_fridge==1
replace fridge=. if b_fridge < 0
label var fridge "Has Refrigerator"

* Rents an apartment or house to third party
gen rent_thirdparty=0
replace rent_thirdparty=1 if b_rent==1
replace rent_thirdparty=. if b_rent==-111|b_rent==-888
label var rent_thirdparty "Rents an apartment or house to a third party"

* Owns house
gen ownhouse=0
replace ownhouse = 1 if b_own_house == 1
label var ownhouse "Owns house"

* Imputing missing values for technology use variables
foreach x of varlist comp_know net_know email_acc socnet_acc{
	replace `x'=. if `x'==-111
}

* Applying labels
label var b_jobs_bef_oa "Any previous work experience"
label var comp_know "Knows how to use a computer"
label var net_know "Knows how to use the internet"
label var email_acc "Has an email account"
label var socnet_acc "Has a social networking account"
label var nbdays_inexp "Days spent in the experiment"


** Creating locals/globals for variable lists for analysis

* Creating list of ten variables to evaluate social background of health workers
global bl_social "male b_age caste_general caste_obc caste_sc caste_st caste_minority religion_hindu religion_muslim religion_oth"

* Creating list of three variables to measure education level of health workers
global bl_education = "twelveandbelow tertiary otherdip_nonform"

* Creating list of five variables to evaluate job history of health workers
global bl_jobhistory "b_jobs_bef_oa b_inc_yy prev_job_TB social_sector other_income"

* Creating list of nine variables to evaluate household background of health workers
global bl_household "same_nbhd hhd_size live_alone elec tap tv fridge rent_thirdparty ownhouse"

* Creating list of five variables to evaluate technical exposure of health workers
global bl_techexp "comp_know net_know email_acc socnet_acc nbdays_inexp"																																		
* Creating list of nine control variables
global bl_hw_controls "b_age_couns b_jobs_bef_oa_couns male_couns gen_caste_couns hindu_couns twelveandbelow_couns tertiary_couns hhd_size_couns ownhouse_couns b_age_dum_couns b_jobs_bef_oa_dum_couns male_dum_couns gen_caste_dum_couns hindu_dum_couns twelveandbelow_dum_couns tertiary_dum_couns hhd_size_dum_couns ownhouse_dum_couns"


** Creating dataset with all health worker level covariates for the analysis predicting the likelihood to default based on health worker characteristics, in defaults.do

preserve

keep Unique_ID $bl_social $bl_education $bl_jobhistory $bl_household $bl_techexp 

save "Data/Intermediate/hw_covariates.dta", replace

restore


** Conducting balance checks on all initial health workers (without the replacements): Table 1, Panel A

gen var=""
for any diff p control_mean control_sd treatment_mean treatment_sd fullsample_mean fullsample_sd N treatment_control: gen X=.

preserve
keep if replacement == 0

#delimit;
local vars= 32;

for any $bl_social $bl_education $bl_jobhistory $bl_household $bl_techexp 
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment final_stratum_id1-final_stratum_id13 if replacement == 0, cl(uid_cluster) small \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0 & replacement == 0\ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1 & replacement == 0 \ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y\
replace treatment_control = treatment_mean - control_mean\
sum X if treatment==0 | treatment == 1 \
replace N=r(N) if _n==Y \ 
replace fullsample_mean=r(mean) if _n==Y \ 
replace fullsample_sd=r(sd) if _n==Y;
for var diff-fullsample_sd: replace X=round(X, 0.001);
outsheet var fullsample_mean fullsample_sd N control_mean control_sd treatment_mean treatment_sd treatment_control p if _n<=`vars' using "Results/Paper/Table1_PanelA.out", replace;

#delimit cr

restore

* Joint orthogonality test on characteristics predicting default (as identified in defaults.do): reported in Section 3.3

ivreg2 treatment final_stratum_id1-final_stratum_id13 tertiary b_inc_yy prev_job_TB other_income fridge rent_thirdparty ownhouse if replacement == 0, cl(uid_cluster) small
test tertiary b_inc_yy prev_job_TB other_income fridge rent_thirdparty ownhouse


** Checking that attrition is not differential across treatment and control 	

* Likelihood to attrit: Table A1, Panel A

su attritor if treatment == 0 	
local mean_control=r(mean)

ivreg2 attritor treatment final_stratum_id1-final_stratum_id13, small cl(uid_cluster) 
quietly outreg2 treatment using "Results/Appendix/TableA1_PanelA.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
ivreg2 attritor treatment final_stratum_id1-final_stratum_id13 $bl_hw_controls, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA1_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

* Attritors' characteristics: Table A1, Panel B, columns 1-6, first set of variables

#delimit;
local vars= 29;

for any male b_age caste_general caste_obc caste_sc caste_st religion_hindu religion_muslim religion_oth twelveandbelow tertiary b_jobs_bef_oa b_inc_yy prev_job_TB other_income $bl_household $bl_techexp
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment if attritor == 1, cl(uid_cluster) small\ 
replace N=e(N) if _n==Y \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0 & attritor == 1 \ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1 & attritor == 1 \ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y;
for var diff-treatment_sd: replace X=round(X, 0.001);
outsheet var control_mean control_sd treatment_mean treatment_sd p N if _n<=`vars' using "Results/Appendix/TableA1_PanelB_1.out", replace;

#delimit cr

* Confirming no difference in health workers across treatment and control for excluded variables above

sum otherdip_nonform if treatment==0 & attritor == 1
sum otherdip_nonform if treatment==1 & attritor == 1
sum social_sector if treatment==0 & attritor == 1
sum social_sector if treatment==1 & attritor == 1
sum caste_minority if treatment==0 & attritor == 1
sum caste_minority if treatment==1 & attritor == 1


** Checking that characteristics of health workers who join are not differential across treatment and control

* Characteristics of health workers who join: Table A1, Panel B, columns 7-12

#delimit;
local vars= 28;

for any male b_age caste_general caste_obc caste_sc caste_minority religion_hindu religion_muslim religion_oth twelveandbelow tertiary b_jobs_bef_oa b_inc_yy prev_job_TB social_sector same_nbhd hhd_size b_live_alone tap tv fridge rent_thirdparty ownhouse $bl_techexp    
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment if replacement == 1, cl(uid_cluster) small \ 
replace N=e(N) if _n==Y \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0 & replacement == 1 \ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1 & replacement == 1 \ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y;
for var diff-treatment_sd: replace X=round(X, 0.001);
outsheet var control_mean control_sd treatment_mean treatment_sd p N if _n<=`vars' using "Results/Appendix/TableA1_PanelB_5.out", replace;

#delimit cr

* Confirming no difference in health workers across treatment and control for excluded variables above

sum otherdip_nonform if treatment==0 & replacement == 1
sum otherdip_nonform if treatment==1 & replacement == 1
sum elec if treatment==0 & replacement == 1
sum elec if treatment==1 & replacement == 1
sum caste_st if treatment==0 & replacement == 1
sum caste_st if treatment==1 & replacement == 1
sum other_income if treatment==0 & replacement == 1
sum other_income if treatment==1 & replacement == 1


****************************************
*** PART-2 *** Descriptive statistics and balance checks for center‐wise data sheets outcomes
****************************************

** Balance checks on center‐wise data sheets outcomes

* Creating the variables

use "Data/Original/centerwise_sheets.dta", clear

merge m:1 UID_Center using "Data/Intermediate/hw_roster_by_center.dta"
ta _merge

keep if MonthIntoExp>100 & MonthIntoExp<107
gen Default_2 = Default / TotOutcome
replace Default_2 = 0 if TotOutcome == 0
bysort uid_cluster: egen baseline_NewPatients=mean(NewPatients) 
bysort uid_cluster: egen baseline_Default_2=mean(Default_2) 
by uid_cluster: gen id = _n

// Mobile health workers have in general two lines, one per area, but that's not the case for two centers: 132391 and 132392, for which we only know the aggregate over both areas; thus need to divide by 2 to get the average per center
replace baseline_NewPatients = baseline_NewPatients / 2 if UID_Center == 132391 | UID_Center == 132392
replace baseline_Default_2 = baseline_Default_2 / 2 if UID_Center == 132391 | UID_Center == 132392

keep if id == 1

* Regression: Table 1, Panel B

gen var=""
for any diff p control_mean control_sd treatment_mean treatment_sd fullsample_mean fullsample_sd N treatment_control: gen X=.

#delimit;
local vars= 2;

for any baseline_NewPatients baseline_Default_2  
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment final_stratum_id1-final_stratum_id13, cl(uid_cluster) small \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0\ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1\ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y\
replace treatment_control = treatment_mean - control_mean\
sum X if treatment==0 | treatment == 1 \ 
replace N=r(N) if _n==Y \ 
replace fullsample_mean=r(mean) if _n==Y \ 
replace fullsample_sd=r(sd) if _n==Y;
for var diff-fullsample_sd: replace X=round(X, 0.001);
outsheet var fullsample_mean fullsample_sd N control_mean control_sd treatment_mean treatment_sd treatment_control p if _n<=`vars' using "Results/Paper/Table1_PanelB.out", replace;

#delimit cr

* Joint orthogonality test: reported in Section 3.3

ivreg2 treatment baseline_NewPatients baseline_Default_2 final_stratum_id1-final_stratum_id13, cl(uid_cluster) small
test baseline_NewPatients baseline_Default_2 
probit treatment baseline_NewPatients baseline_Default_2 final_stratum_id1-final_stratum_id13, vce(cluster uid_cluster)
test baseline_NewPatients baseline_Default_2 


** Checking that attrition is not differential across treatment and control 	

* Creating the variables

use "Data/Original/centerwise_sheets.dta", clear

merge m:1 UID_Center using "Data/Intermediate/hw_roster_by_center.dta"
ta _merge

keep if MonthIntoExp>100 & MonthIntoExp<107
gen Default_2 = Default / TotOutcome
replace Default_2 = 0 if TotOutcome == 0
bysort uid_cluster: egen baseline_NewPatients=mean(NewPatients) 
bysort uid_cluster: egen baseline_Default_2=mean(Default_2) 
by uid_cluster: gen id = _n

// Mobile health workers have in general two lines, one per area, but that's not the case for two centers: 132391 and 132392, for which we only know the aggregate over both areas; thus need to divide by 2 to get the average per center
replace baseline_NewPatients = baseline_NewPatients / 2 if UID_Center == 132391 | UID_Center == 132392
replace baseline_Default_2 = baseline_Default_2 / 2 if UID_Center == 132391 | UID_Center == 132392

* Regression: Table A1, Panel B, columns 1-6, second set of variables

keep if id == 1
gen var=""
for any diff p control_mean control_sd treatment_mean treatment_sd fullsample_mean fullsample_sd N: gen X=.

#delimit;
local vars= 2;

for any baseline_NewPatients baseline_Default_2    
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment if some_attrition == 1, cl(uid_cluster) small\ 
replace N=e(N) if _n==Y \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0 & some_attrition == 1 \ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1 & some_attrition == 1 \ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y;
for var diff-fullsample_sd: replace X=round(X, 0.001);
outsheet var control_mean control_sd treatment_mean treatment_sd p N if _n<=`vars' using "Results/Appendix/TableA1_PanelB_2.out", replace;

#delimit cr


****************************************
*** PART-3 *** Descriptive statistics and balance checks for observation days outcomes
****************************************

** Creating the variables

use "Data/Original/observation_days_baseline.dta", clear

merge m:1 UID_Center using "Data/Intermediate/hw_roster_by_center.dta", gen(_mergeCenterData) 
keep if _merge == 3 
drop _merge

* Creating variable for row-number within each monitoring instance
bys unique_mon_instance (patient_serial): gen row_id = _n

* Checking that observation days are before the experiment start date
gen daysfromexpstart = expstartdate - visit_date
sum daysfromexpstart if row_id == 1, det
drop daysfromexpstart

* Checking that observation days are primarily before biometric devices are installed
gen daysbeforeinstallation = dateinstalled - visit_date
sum daysbeforeinstallation if row_id == 1 & treatment == 1, det
drop daysbeforeinstallation

* Constructing time variables
rename (cntr_*_hh_?m cntr_*_mm_?m) (cntr_*_?m_hh cntr_*_?m_mm) 
recode cntr_closing_time_am_hh (1=13) (2=14) (6=18)
recode cntr_closing_time_pm_hh (7=19) (8=20) (9=21)
recode cntr_open_time_pm_hh (1=13) (5=17) (6=18)
recode couns_deprt_time_hh (1=13) (2=14) 

foreach var in cntr_open_time_am cntr_closing_time_am cntr_open_time_pm cntr_closing_time_pm couns_starttime couns_deprt_time cdp_starttime cdp_deprt_time pat_arr_time {
				
	* Cleaning hour and minute variables
	replace `var'_hh = 0 if `var'_hh == 24
	replace `var'_hh = . if !inrange(`var'_hh,0,23)
	replace `var'_mm = 0 if !inrange(`var'_mm,0,59)
	
	foreach tt in hh mm {
		
		qui count if missing(`var'_`tt')
		local miss_`var'_`tt' = `r(N)'
	
	}
	
	gen `var' = hms(`var'_hh,`var'_mm,0)
	format `var' %tcHH:MM
	
	qui count if missing(`var')
	local miss_`var' = `r(N)'
	
	assert `miss_`var'' == max(`miss_`var'_hh',`miss_`var'_mm')
	
	order `var'_hh `var',after(`var'_mm)
}

* Generating adherence metrics
* [adherence_1] Patient picked pill directly
gen adherence_1 = 0 if patient_row == 1
replace adherence_1 = 1 if (pat_took_pills_yn == 1 | pat_picked_pills_yn == 1)
replace adherence_1 = . if !(pat_took_pills_yn == 1 | pat_picked_pills_yn == 1 | (pat_took_pills_yn == 0 & pat_picked_pills_yn == 0))
// Coding "adherence_1" to "missing" whenever when we don't know definitively that patient did not take or pick a pill
							
* [adherence_2] adherence_1 + relative picked a pill on patient’s behalf + patient or relative was met during a home visit
gen adherence_2 = adherence_1
replace adherence_2 = 1 if reltv_picked_pills_yn == 1 | couns_home_vist_yn == 1
replace adherence_2 = . if adherence_1 == 0 & (inlist(reltv_picked_pills_yn,-222,.) | inlist(couns_home_vist_yn,-222,.))
// Logically, we cannot code adherence_2 = 0 if adherence_1 == MISSING (We don't definitively know that patient did not take their pill, how can we definitively know that neither patient nor relative took a pill?)
// However, it is possible that where we know adherence_1 == 0, we might not be able to definitively say that a relative did not show up either (and same for home visit), and hence adherence_2 == .

* Constructing variable indicating whether center was open
gen center_open_yn = (center_open_yn_am == 1) if center_open_yn_am != -222

* Constructing center open duration variable
gen cntr_open_duration_am = 0
replace cntr_open_duration_am = (cntr_closing_time_am - cntr_open_time_am)/(1000*60) if !missing(cntr_closing_time_am) & !missing(cntr_open_time_am) 
replace cntr_open_duration_am = . if cntr_open_duration_am < 0

gen cntr_open_duration_pm = 0
replace cntr_open_duration_pm = (cntr_closing_time_pm - cntr_open_time_pm)/(1000*60) if !missing(cntr_closing_time_pm) & !missing(cntr_open_time_pm)
replace cntr_open_duration_pm = . if cntr_open_duration_pm < 0

gen cntr_mid_day_overlap = 0
replace cntr_mid_day_overlap = (cntr_closing_time_am - cntr_open_time_pm)/(1000*60) if !missing(cntr_closing_time_am) & !missing(cntr_open_time_pm)
replace cntr_mid_day_overlap = 0 if cntr_mid_day_overlap < 0

gen cntr_open_duration_day = cntr_open_duration_am + cntr_open_duration_pm - cntr_mid_day_overlap
replace cntr_open_duration_day = . if cntr_open_duration_day < 0

* Adjusting center open frequency and duration based on other data

replace center_open_yn = 1 if !missing(cntr_open_time_am) | !missing(cntr_closing_time_am) | !missing(cntr_open_time_pm) | !missing(cntr_closing_time_pm)

replace cntr_open_duration_day = . if center_open_yn == .
replace cntr_open_duration_day = 0 if center_open_yn == 0

* Cleaning variable indicating visit by Operation ASHA
tab opasha_visit_yn center_open_yn, m
tab opasha_visit_yn couns_present_yn, m
replace opasha_visit_yn = . if opasha_visit_yn < 0 
replace opasha_visit_yn = 0 if center_open_yn != . & opasha_visit_yn == .

* Cleaning variable indicating whether health worker was present
replace couns_present_yn = . if couns_present_yn == -222

* Constructing health worker duration variable
gen couns_duration = 0
replace couns_duration = (couns_deprt_time - couns_starttime)/(1000*60) if !missing(couns_deprt_time) & !missing(couns_starttime) 
replace couns_duration = . if couns_duration < 0

* Adjusting health worker present frequency and duration based on other data

replace couns_present_yn = 1 if !missing(couns_starttime) | !missing(couns_deprt_time)

replace couns_duration = . if couns_present_yn == .
replace couns_duration = 0 if couns_present_yn == 0


** Extracting information regarding Operation ASHA visits, for merge with non-baseline data (in hw_effort.do)

preserve

keep if row_id == 1
keep uid_cluster opasha_visit_yn final_stratum_id1-final_stratum_id13 treatment
rename opasha_visit_yn opasha_visit_day
gen baseline = 1
save  "Data/Intermediate/baseline_opasha_visit.dta", replace

restore


** Creating cluster-level variables

bysort uid_cluster: egen baseline_center_open_yn=mean(center_open_yn) if row_id == 1
bysort uid_cluster: egen baseline_cntr_open_duration_day=mean(cntr_open_duration_day) if row_id == 1
bysort uid_cluster: egen baseline_couns_present_yn=mean(couns_present_yn) if row_id == 1
bysort uid_cluster: egen baseline_couns_duration=mean(couns_duration) if row_id == 1
bysort uid_cluster: egen baseline_opasha_visit_yn=mean(opasha_visit_yn) if row_id == 1
sort uid_cluster row_id
by uid_cluster: gen id = _n

bysort uid_cluster: egen baseline_adherence_1=mean(adherence_1) if patient_row == 1
bysort uid_cluster: egen baseline_adherence_2=mean(adherence_2) if patient_row == 1
sort uid_cluster patient_row
by uid_cluster: gen id2 = _n

gen var=""
for any diff p control_mean control_sd treatment_mean treatment_sd fullsample_mean fullsample_sd N treatment_control: gen X=.


** Balance checks on observation days outcomes: Table 1, Panel C

* Center- and health-worker-level outcomes

preserve
keep if id == 1

#delimit;
local vars= 5;

for any baseline_center_open_yn baseline_cntr_open_duration_day baseline_couns_present_yn baseline_couns_duration baseline_opasha_visit_yn 
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment final_stratum_id1-final_stratum_id13 if id == 1, cl(uid_cluster) small \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0 & id == 1\ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1 & id == 1\ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y\
replace treatment_control = treatment_mean - control_mean\
sum X if (treatment==0 | treatment == 1) & id == 1 \ 
replace N=r(N) if _n==Y \ 
replace fullsample_mean=r(mean) if _n==Y \ 
replace fullsample_sd=r(sd) if _n==Y;
for var diff-fullsample_sd: replace X=round(X, 0.001);
outsheet var fullsample_mean fullsample_sd N control_mean control_sd treatment_mean treatment_sd treatment_control p if _n<=`vars' using "Results/Paper/Table1_PanelC_1.out", replace;

#delimit cr

restore

* Preparing data for joint orthogonality test

preserve
keep if id == 1
keep uid_cluster treatment baseline_center_open_yn baseline_cntr_open_duration_day baseline_couns_present_yn baseline_couns_duration baseline_opasha_visit_yn final_stratum_id1-final_stratum_id13
save "Data/Intermediate/balance_check_Table1_PanelC1.dta", replace
restore

* Patient-level outcomes

preserve
keep if id2 == 1

#delimit;
local vars= 2;

for any baseline_adherence_1 baseline_adherence_2
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment final_stratum_id1-final_stratum_id13 if id2 == 1, cl(uid_cluster) small \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0 & id2 == 1\ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1 & id2 == 1\ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y\
replace treatment_control = treatment_mean - control_mean\
sum X if (treatment==0 | treatment == 1) & id2 == 1 \ 
replace N=r(N) if _n==Y \ 
replace fullsample_mean=r(mean) if _n==Y \ 
replace fullsample_sd=r(sd) if _n==Y;
for var diff-fullsample_sd: replace X=round(X, 0.001);
outsheet var fullsample_mean fullsample_sd N control_mean control_sd treatment_mean treatment_sd treatment_control p if _n<=`vars' using "Results/Paper/Table1_PanelC_2.out", replace;

#delimit cr

restore

* Preparing data for joint orthogonality test

preserve
keep if id2 == 1
keep uid_cluster baseline_adherence_1 baseline_adherence_2
save "Data/Intermediate/balance_check_Table1_PanelC2.dta", replace
restore

* Joint orthogonality test on center-, health-worker-, and patient-level outcomes: reported in Section 3.3

preserve

use "Data/Intermediate/balance_check_Table1_PanelC1.dta", clear

merge 1:1 uid_cluster using "Data/Intermediate/balance_check_Table1_PanelC2.dta"
drop _merge

ivreg2 treatment final_stratum_id1-final_stratum_id13 baseline_center_open_yn baseline_cntr_open_duration_day baseline_couns_present_yn baseline_couns_duration baseline_opasha_visit_yn baseline_adherence_1 baseline_adherence_2, cl(uid_cluster) small
test baseline_center_open_yn baseline_cntr_open_duration_day baseline_couns_present_yn baseline_couns_duration baseline_opasha_visit_yn baseline_adherence_1 baseline_adherence_2

restore


** Checking that attrition is not differential across treatment and control: Table A1, Panel B, columns 1-6, third set of variables

* Center- and health-worker-level outcomes

#delimit;
local vars= 5;

for any baseline_center_open_yn baseline_cntr_open_duration_day baseline_couns_present_yn baseline_couns_duration baseline_opasha_visit_yn 
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment if some_attrition == 1 & id == 1, cl(uid_cluster) small \ 
replace N=e(N) if _n==Y \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0 & some_attrition == 1 & id == 1\ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1 & some_attrition == 1 & id == 1\ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y;
for var diff-treatment_sd: replace X=round(X, 0.001);
outsheet var control_mean control_sd treatment_mean treatment_sd p N if _n<=`vars' using "Results/Appendix/TableA1_PanelB_3.out", replace;

#delimit cr

* Patient-level outcomes

#delimit;
local vars= 2;

for any baseline_adherence_1 baseline_adherence_2
\ num 1/`vars': 
replace var="X" if _n==Y \ 
ivreg2 X treatment if some_attrition == 1 & id2 == 1, cl(uid_cluster) small \ 
replace N=e(N) if _n==Y \ 
mat beta=e(b) \ 
replace diff=beta[1,1] if _n==Y \ 
test treatment=0 \ 
replace p=r(p) if _n==Y \ 
sum X if treatment==0 & some_attrition == 1 & id2 == 1\ 
replace control_mean=r(mean) if _n==Y \ 
replace control_sd=r(sd) if _n==Y \ 
sum X if treatment==1 & some_attrition == 1 & id2 == 1\ 
replace treatment_mean=r(mean) if _n==Y \ 
replace treatment_sd=r(sd) if _n==Y;
for var diff-treatment_sd: replace X=round(X, 0.001);
outsheet var control_mean control_sd treatment_mean treatment_sd p N if _n<=`vars' using "Results/Appendix/TableA1_PanelB_4.out", replace;

#delimit cr


** Statistics for Figure 2

* Number of areas by treatment group

use "Data/Intermediate/hw_roster_by_center.dta", clear
ta Couns_Type treatment
// Mobile areas need to multiplied by 2

* Number of health workers by treatment group

use "Data/Intermediate/hw_roster_by_hw.dta", clear
ta treatment if replacement == 0
ta treatment if replacement == 1
ta treatment

* Number of observations per stratum

ta final_stratum if replacement == 0
